In [ ]:
from planet4 import region_data, io, stats

per region


In [ ]:
def process_region(args):
    from planet4 import region_data, io, stats
    regionclass, season = args
    try:
        image_names = getattr(regionclass, season)
    except AttributeError:
        print("No data for that season found.")
        return None
    results = []
    for image_name in image_names:
        df = io.DBManager(s23=False).get_image_name_markings(image_name)
        results.append(stats.get_status_per_classifications(df))
    resdf = pd.DataFrame(dict(image_name=image_names, completion=results))
    resdf['season'] = season
    resdf['region'] = regionclass.__name__
    return resdf

In [ ]:
def get_process_list():
    processlist = []
    for season in ['season'+str(i) for i in range(1,5)]:
        for region in region_data.regions:
            processlist.append((region, season))
    return processlist

In [ ]:
get_process_list()

In [ ]:
from ipyparallel import Client
c = Client()
lbview = c.load_balanced_view()

In [ ]:
from nbtools import display_multi_progress

In [ ]:
import sys
processlist = get_process_list()
results = lbview.map_async(process_region, processlist)
display_multi_progress(results, processlist)

In [ ]:
statusdf = pd.concat(results.result(), ignore_index=True)

In [ ]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [ ]:
statusdf[statusdf.completion < 110].hist(bins=50)

In [ ]:
get_process_list()

In [ ]:
statusdf[statusdf.completion < 95]

per image_name (= hirise obs_id)


In [ ]:
from planet4 import io

In [ ]:
# image_names = io.get_all_image_names()
# image_names = image_names.image_name.unique()
# with open('current_image_names.pkl', 'wb') as f:
#     pickle.dump(image_names, f)

In [ ]:
image_names = io.get_image_names_from_db(io.get_current_database_fname())

In [ ]:
from planet4 import helper_functions as hf

In [ ]:
def process_image_name(image_name):
    from planet4 import helper_functions as hf
    from planet4 import io
    df = io.get_image_name_data(image_name)
    status = hf.get_status_per_classifications(df)
    return image_name, status

In [ ]:
def process_image_name(image_name):
    from planet4 import helper_functions as hf
    from planet4 import io
    df = io.get_image_name_data(image_name)
    no_done = hf.get_no_tiles_done(df)
    return {'image_name': image_name, 'no_done': no_done}

In [ ]:
from ipyparallel import Client
c = Client()
lbview = c.load_balanced_view()

In [ ]:
results = lbview.map_async(process_image_name, image_names)

In [ ]:
for result in results:
    print(result)

In [ ]:
df = pd.DataFrame(results.result)
df = df.set_index('image_name')
df

In [ ]:
df.no_done.sum()

In [ ]:
%matplotlib nbagg

In [ ]:
df = df.drop('tutorial')

In [ ]:
df.plot(kind='hist',bins=30)

In [ ]:
df = pd.read_hdf('image_name_status.h5','df')

In [ ]:
df.head()

Figure for HiRISE meeting


In [ ]:
import helper_functions as hf
reload(hf)

pd.__version__

df = hf.get_current_cleaned()

users_work = hf.classification_counts_per_user(df)
topten = users_work.order(ascending=False)[:10]

topten

hf.classification_counts_for_user('Kitharode',df)

for user in topten.index:
    print user
    print df[df.user_name==user].marking.value_counts()

df.marking.value_counts().plot?

s = df.marking.value_counts()

s.plot(kind='bar')
title('Marking stats')

savefig("marking_stats.png",dpi=200)

df.columns

no_nones = df[df.marking != 'None']

hf.get_top_ten_users(no_nones)

topten = hf.get_top_ten_users(df)

topten.plot(kind='bar')
title("Top ten citizens, submissions")
savefig('top_ten_submitters.png',dpi=200)